package com.momo.demo11_utils;

import java.io.IOException;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;
import org.wltea.analyzer.lucene.IKQueryParser;
import org.wltea.analyzer.lucene.IKSimilarity;

/**
 * 获取日志当中的关键字
 */
public class KeyWordUtil {

    // 提取关键词
    public static List<String> getKeyWord(String text) throws IOException {
        List<String> keywords = new ArrayList<String>();
        if (text != null && !text.equals("")) {
            Map<String, Integer> words = new HashMap<String, Integer>();
            Analyzer analyzer = new IKAnalyzer(true);
            StringReader reader = new StringReader(text);
            TokenStream tokenStream = analyzer.tokenStream("*", reader);
            TermAttribute termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
            while (tokenStream.incrementToken()) {
                String word = termAtt.term();
                if (word.length() > 1 && strlen(word, "GBK") > 2) {
                    Integer count = words.get(word);
                    if (count == null) {
                        count = 0;
                    }
                    words.put(word, count + 1);
                }
            }
            if (words.size() > 0) {
                Directory dir = null;
                IndexSearcher searcher = null;
                try {
                    String fieldName = "text";
                    dir = new RAMDirectory();
                    IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
                    Document doc = new Document();
                    doc.add(new Field(fieldName, text, Field.Store.YES, Field.Index.ANALYZED));
                    writer.addDocument(doc);
                    writer.close();
                    searcher = new IndexSearcher(dir);
                    searcher.setSimilarity(new IKSimilarity());
                    Set<String> keys = words.keySet();
                    Map<String, Float> temps = new HashMap<String, Float>();
                    for (String key : keys) {
                        int count = words.get(key);
                        Query query = IKQueryParser.parse(fieldName, key);
                        TopDocs topDocs = searcher.search(query, 1);
                        if (topDocs.totalHits > 0) {
                            temps.put(key, topDocs.getMaxScore() * count);
                        }
                    }
                    Entry<String, Float>[] keywordEntry = getSortedHashtableByValue(temps);
                    for (Entry<String, Float> entry : keywordEntry) {
                        if (keywords.size() < 5) {// 取前五个关键字
                            keywords.add(entry.getKey());
                        }
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                } finally {
                    try {
                        searcher.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                    try {
                        dir.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }
        }
        return keywords;
    }

    // 获取文本长度
    public static int strlen(String text, String charsetName) {
        if (text == null || text.length() == 0) {
            return 0;
        }
        int length = 0;
        try {
            length = text.getBytes(charsetName).length;
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        return length;
    }

    @SuppressWarnings("unchecked")
    public static Entry<String, Float>[] getSortedHashtableByValue(Map<String, Float> h) {
        Set<Entry<String, Float>> set = h.entrySet();
        Entry<String, Float>[] entries = set.toArray(new Entry[set.size()]);
        Arrays.sort(entries, new Comparator() {
            public int compare(Object arg0, Object arg1) {
                Entry entry1 = (Entry) arg0;
                Entry entry2 = (Entry) arg1;
                Float value1 = (Float) entry1.getValue();
                Float value2 = (Float) entry2.getValue();
                int size = value2.compareTo(value1);
                if (size == 0) {
                    String key1 = (String) entry1.getKey();
                    String key2 = (String) entry2.getKey();
                    return key1.compareTo(key2);
                }
                return size;
            }
        });
        return entries;
    }

    // 将对象转化成字符串
    @SuppressWarnings("unchecked")
    public static String implode(Object data, String separator) {
        if (data == null) {
            return "";
        }
        StringBuffer out = new StringBuffer();
        if (data instanceof Object[]) {
            boolean flag = false;
            for (Object obj : (Object[]) data) {
                if (flag) {
                    out.append(separator);
                } else {
                    flag = true;
                }
                out.append(obj);
            }
        } else if (data instanceof Map) {
            Map temp = (Map) data;
            Set<Object> keys = temp.keySet();
            boolean flag = false;
            for (Object key : keys) {
                if (flag) {
                    out.append(separator);
                } else {
                    flag = true;
                }
                out.append(temp.get(key));
            }
        } else if (data instanceof Collection) {
            boolean flag = false;
            for (Object obj : (Collection) data) {
                if (flag) {
                    out.append(separator);
                } else {
                    flag = true;
                }
                out.append(obj);
            }
        } else {
            return data.toString();
        }
        return out.toString();
    }

    public static String[] termsFormAnalysis(Analyzer analyzer, String text) throws IOException {
        TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
        // stream.addAttribute(TermAttribute.class);
        ArrayList<String> termList = new ArrayList<String>();

        // Lucene0改用TokenStream.increamentToken()方法向下一项移动
        while (stream.incrementToken()) {
            TermAttribute termAttribute = (TermAttribute) stream.getAttribute(TermAttribute.class);
            termList.add(termAttribute.term());
        }
        return (String[]) termList.toArray(new String[0]);
    }
}
